import os
import pandas as pd

def aggiungi_label(dataset, label_df, nome_file, output_dir):
    dataset = dataset.copy()

    # PULIZIA LABEL
    label_df["Label"] = pd.to_numeric(label_df["Label"], errors='coerce')
    label_df = label_df[label_df["Label"].isin([0, 1])].copy()  
    label_df["ID_WSI_unique"] = label_df["ID_WSI_unique"].str.strip()


    # Colonna temporanea per il matching
    dataset["ID_temp"] = dataset.iloc[:, 0].str.strip().str.replace(r"\s*[FI]$", "", regex=True)

    pazienti_scartati = []
    labels = []

    for idx, row in dataset.iterrows():
        paz_id_temp = row["ID_temp"]
        label_row = label_df[label_df["ID_WSI_unique"] == paz_id_temp]

        if not label_row.empty:
            labels.append(label_row["Label"].values[0])
        else:
            pazienti_scartati.append(row.iloc[0])
            labels.append(None)

    dataset["Label"] = labels
    dataset.drop(columns=["ID_temp"], inplace=True)
    dataset.dropna(subset=["Label"], inplace=True)

    # Salvataggio pazienti scartati
    if pazienti_scartati:
        with open(os.path.join(output_dir, f"scartati_{nome_file}.txt"), "w") as f:
            for p in pazienti_scartati:
                f.write(p + "\n")

    return dataset

def aggiunta_classi(path_media, path_stats, label_df, output_dir):
    os.makedirs(output_dir, exist_ok=True)

    # Lettura file
    media = pd.read_csv(path_media)
    stats = pd.read_csv(path_stats)

    # Aggiunta classi
    media_label = aggiungi_label(media, label_df, "media", output_dir)
    stats_label = aggiungi_label(stats, label_df, "stats", output_dir)

    # Salvataggio file
    media_label_path = os.path.join(output_dir, "media_label.csv")
    stats_label_path = os.path.join(output_dir, "stats_label.csv")
    media_label.to_csv(media_label_path, index=False)
    stats_label.to_csv(stats_label_path, index=False)

    media_label.to_excel(os.path.join(output_dir, "media_label.xlsx"), index=False)
    stats_label.to_excel(os.path.join(output_dir, "stats_label.xlsx"), index=False)

    return media_label_path, stats_label_path
